1 Wprowadzenie

Niniejszy raport przedstawia analizę danych w języku R. Zestaw danych, pochodzący z Protein Data Bank, dotyczy ligandów. Po wstępnym przetworzeniu danych pokazano rozkład liczby atomów i elektronów, ligandy o największej niezgodności liczby atomów i elektronów, czy korelację między niektórymi zmiennymi. Raport zakończony jest stworzeniem klasyfikatora, który na podstawie pozostałych parametrów próbuje przewidzieć nazwę ligandu.

2 Ładowanie bibliotek

library(data.table)
library(dplyr)
library(DT)
library(tidyr)
library(ggplot2)
library(plotly)
library(knitr)
library(reshape2)
library(kableExtra)
library(caret)
library(party)
opts_chunk$set(message = FALSE, warning = FALSE)

3 Zapewnienie powtarzalności

set.seed(123)

4 Wczytywanie danych z pliku

data <- fread("all_summary.csv", nrows = 300000)

5 Usuwanie niektórych wierszy

to_remove <- c("UNK", "UNX", "UNL", "DUM", "N", "BLOB", "ALA", "ARG", "ASN", "ASP", "CYS", "GLN", "GLU", "GLY", "HIS", "ILE", "LEU", "LYS", "MET", "MSE", "PHE", "PRO", "SEC", "SER", "THR", "TRP", "TYR", "VAL", "DA", "DG", "DT", "DC", "DU", "A", "G", "T", "C", "U", "HOH", "H20", "WAT")
data_cleared <- data %>% select(-(blob_coverage:pdb_code), -(res_id:local_res_atom_count), -(local_res_atom_non_h_occupancy_sum), -(local_res_atom_non_h_electron_occupancy_sum:local_res_atom_S_count), -(dict_atom_C_count:skeleton_periphery), -(local_max_over_std), -(local_cut_by_mainchain_volume:local_near_cut_count_N), -(fo_col:resolution_max_limit), -(part_step_FoFc_std_min:part_step_FoFc_std_step)) %>%
  filter(res_name != to_remove)

Powyższy kod usuwa wiersze z podanymi wartościami res_name oraz kolumny, które są nieopisane, niewykorzystywane do klasyfikacji lub zawierające dużo brakujących wartości.

6 Usuwanie wierszy z brakującymi wartościami

data_final <- drop_na(data_cleared)

7 Podsumowanie

Rozmiar zbioru to: 268725, 336. Poniżej przedstawiona jest tabela zawierająca podstawowe statystyki.

dim(data_final)
## [1] 268725    336
knitr::kable(summary(data_final, digits = 2)) %>% 
  kable_styling(full_width = F) %>% 
  scroll_box(width = "100%")
res_name local_res_atom_non_h_count local_res_atom_non_h_electron_sum dict_atom_non_h_count dict_atom_non_h_electron_sum local_volume local_electrons local_mean local_std local_min local_max local_skewness part_00_shape_segments_count part_00_density_segments_count part_00_volume part_00_electrons part_00_mean part_00_std part_00_max part_00_max_over_std part_00_skewness part_00_parts part_00_shape_O3 part_00_shape_O4 part_00_shape_O5 part_00_shape_FL part_00_shape_O3_norm part_00_shape_O4_norm part_00_shape_O5_norm part_00_shape_FL_norm part_00_shape_I1 part_00_shape_I2 part_00_shape_I3 part_00_shape_I4 part_00_shape_I5 part_00_shape_I6 part_00_shape_I1_norm part_00_shape_I2_norm part_00_shape_I3_norm part_00_shape_I4_norm part_00_shape_I5_norm part_00_shape_I6_norm part_00_shape_M000 part_00_shape_CI part_00_shape_E3_E1 part_00_shape_E2_E1 part_00_shape_E3_E2 part_00_shape_sqrt_E1 part_00_shape_sqrt_E2 part_00_shape_sqrt_E3 part_00_density_O3 part_00_density_O4 part_00_density_O5 part_00_density_FL part_00_density_O3_norm part_00_density_O4_norm part_00_density_O5_norm part_00_density_FL_norm part_00_density_I1 part_00_density_I2 part_00_density_I3 part_00_density_I4 part_00_density_I5 part_00_density_I6 part_00_density_I1_norm part_00_density_I2_norm part_00_density_I3_norm part_00_density_I4_norm part_00_density_I5_norm part_00_density_I6_norm part_00_density_M000 part_00_density_CI part_00_density_E3_E1 part_00_density_E2_E1 part_00_density_E3_E2 part_00_density_sqrt_E1 part_00_density_sqrt_E2 part_00_density_sqrt_E3 part_00_shape_Z_7_3 part_00_shape_Z_0_0 part_00_shape_Z_7_0 part_00_shape_Z_7_1 part_00_shape_Z_3_0 part_00_shape_Z_5_2 part_00_shape_Z_6_1 part_00_shape_Z_3_1 part_00_shape_Z_6_0 part_00_shape_Z_2_1 part_00_shape_Z_6_3 part_00_shape_Z_2_0 part_00_shape_Z_6_2 part_00_shape_Z_5_0 part_00_shape_Z_5_1 part_00_shape_Z_4_2 part_00_shape_Z_1_0 part_00_shape_Z_4_1 part_00_shape_Z_7_2 part_00_shape_Z_4_0 part_00_density_Z_7_3 part_00_density_Z_0_0 part_00_density_Z_7_0 part_00_density_Z_7_1 part_00_density_Z_3_0 part_00_density_Z_5_2 part_00_density_Z_6_1 part_00_density_Z_3_1 part_00_density_Z_6_0 part_00_density_Z_2_1 part_00_density_Z_6_3 part_00_density_Z_2_0 part_00_density_Z_6_2 part_00_density_Z_5_0 part_00_density_Z_5_1 part_00_density_Z_4_2 part_00_density_Z_1_0 part_00_density_Z_4_1 part_00_density_Z_7_2 part_00_density_Z_4_0 part_01_shape_segments_count part_01_density_segments_count part_01_volume part_01_electrons part_01_mean part_01_std part_01_max part_01_max_over_std part_01_skewness part_01_parts part_01_shape_O3 part_01_shape_O4 part_01_shape_O5 part_01_shape_FL part_01_shape_O3_norm part_01_shape_O4_norm part_01_shape_O5_norm part_01_shape_FL_norm part_01_shape_I1 part_01_shape_I2 part_01_shape_I3 part_01_shape_I4 part_01_shape_I5 part_01_shape_I6 part_01_shape_I1_norm part_01_shape_I2_norm part_01_shape_I3_norm part_01_shape_I4_norm part_01_shape_I5_norm part_01_shape_I6_norm part_01_shape_M000 part_01_shape_CI part_01_shape_E3_E1 part_01_shape_E2_E1 part_01_shape_E3_E2 part_01_shape_sqrt_E1 part_01_shape_sqrt_E2 part_01_shape_sqrt_E3 part_01_density_O3 part_01_density_O4 part_01_density_O5 part_01_density_FL part_01_density_O3_norm part_01_density_O4_norm part_01_density_O5_norm part_01_density_FL_norm part_01_density_I1 part_01_density_I2 part_01_density_I3 part_01_density_I4 part_01_density_I5 part_01_density_I6 part_01_density_I1_norm part_01_density_I2_norm part_01_density_I3_norm part_01_density_I4_norm part_01_density_I5_norm part_01_density_I6_norm part_01_density_M000 part_01_density_CI part_01_density_E3_E1 part_01_density_E2_E1 part_01_density_E3_E2 part_01_density_sqrt_E1 part_01_density_sqrt_E2 part_01_density_sqrt_E3 part_01_shape_Z_7_3 part_01_shape_Z_0_0 part_01_shape_Z_7_0 part_01_shape_Z_7_1 part_01_shape_Z_3_0 part_01_shape_Z_5_2 part_01_shape_Z_6_1 part_01_shape_Z_3_1 part_01_shape_Z_6_0 part_01_shape_Z_2_1 part_01_shape_Z_6_3 part_01_shape_Z_2_0 part_01_shape_Z_6_2 part_01_shape_Z_5_0 part_01_shape_Z_5_1 part_01_shape_Z_4_2 part_01_shape_Z_1_0 part_01_shape_Z_4_1 part_01_shape_Z_7_2 part_01_shape_Z_4_0 part_01_density_Z_7_3 part_01_density_Z_0_0 part_01_density_Z_7_0 part_01_density_Z_7_1 part_01_density_Z_3_0 part_01_density_Z_5_2 part_01_density_Z_6_1 part_01_density_Z_3_1 part_01_density_Z_6_0 part_01_density_Z_2_1 part_01_density_Z_6_3 part_01_density_Z_2_0 part_01_density_Z_6_2 part_01_density_Z_5_0 part_01_density_Z_5_1 part_01_density_Z_4_2 part_01_density_Z_1_0 part_01_density_Z_4_1 part_01_density_Z_7_2 part_01_density_Z_4_0 part_02_shape_segments_count part_02_density_segments_count part_02_volume part_02_electrons part_02_mean part_02_std part_02_max part_02_max_over_std part_02_skewness part_02_parts part_02_shape_O3 part_02_shape_O4 part_02_shape_O5 part_02_shape_FL part_02_shape_O3_norm part_02_shape_O4_norm part_02_shape_O5_norm part_02_shape_FL_norm part_02_shape_I1 part_02_shape_I2 part_02_shape_I3 part_02_shape_I4 part_02_shape_I5 part_02_shape_I6 part_02_shape_I1_norm part_02_shape_I2_norm part_02_shape_I3_norm part_02_shape_I4_norm part_02_shape_I5_norm part_02_shape_I6_norm part_02_shape_M000 part_02_shape_CI part_02_shape_E3_E1 part_02_shape_E2_E1 part_02_shape_E3_E2 part_02_shape_sqrt_E1 part_02_shape_sqrt_E2 part_02_shape_sqrt_E3 part_02_density_O3 part_02_density_O4 part_02_density_O5 part_02_density_FL part_02_density_O3_norm part_02_density_O4_norm part_02_density_O5_norm part_02_density_FL_norm part_02_density_I1 part_02_density_I2 part_02_density_I3 part_02_density_I4 part_02_density_I5 part_02_density_I6 part_02_density_I1_norm part_02_density_I2_norm part_02_density_I3_norm part_02_density_I4_norm part_02_density_I5_norm part_02_density_I6_norm part_02_density_M000 part_02_density_CI part_02_density_E3_E1 part_02_density_E2_E1 part_02_density_E3_E2 part_02_density_sqrt_E1 part_02_density_sqrt_E2 part_02_density_sqrt_E3 part_02_shape_Z_7_3 part_02_shape_Z_0_0 part_02_shape_Z_7_0 part_02_shape_Z_7_1 part_02_shape_Z_3_0 part_02_shape_Z_5_2 part_02_shape_Z_6_1 part_02_shape_Z_3_1 part_02_shape_Z_6_0 part_02_shape_Z_2_1 part_02_shape_Z_6_3 part_02_shape_Z_2_0 part_02_shape_Z_6_2 part_02_shape_Z_5_0 part_02_shape_Z_5_1 part_02_shape_Z_4_2 part_02_shape_Z_1_0 part_02_shape_Z_4_1 part_02_shape_Z_7_2 part_02_shape_Z_4_0 part_02_density_Z_7_3 part_02_density_Z_0_0 part_02_density_Z_7_0 part_02_density_Z_7_1 part_02_density_Z_3_0 part_02_density_Z_5_2 part_02_density_Z_6_1 part_02_density_Z_3_1 part_02_density_Z_6_0 part_02_density_Z_2_1 part_02_density_Z_6_3 part_02_density_Z_2_0 part_02_density_Z_6_2 part_02_density_Z_5_0 part_02_density_Z_5_1 part_02_density_Z_4_2 part_02_density_Z_1_0 part_02_density_Z_4_1 part_02_density_Z_7_2 part_02_density_Z_4_0 resolution FoFc_mean
FoFc_std </th>
FoFc_square_std
FoFc_min </th>
FoFc_max </th>
Length:268725 Min. : 1 Min. : 3 Min. : 1 Min. : 3 Min. : 93 Min. : 0.18 Min. :0.00063 Min. :0.0047 Min. :0 Min. : 0.04 Min. :0.0092 Min. : 1 Min. : 1 Min. : 0.82 Min. : 0.18 Min. :0.032 Min. :0.003 Min. : 0.04 Min. : 3.8 Min. :0.0023 Min. : 1.0 Min. :7.1e+02 Min. :1.4e+05 Min. :8.1e+06 Min. :2.0e+04 Min. : 0.23 Min. :0.018 Min. :0.00045 Min. :0.0e+00 Min. :7.0e+03 Min. :9.0e+06 Min. :1.7e+07 Min. :8.4e+03 Min. :6.0e+00 Min. :2.2e+06 Min. :6.4e-02 Min. :1.1e-03 Min. : 0 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. : 102 Min. :-1.3e+02 Min. :0.00009 Min. :0.00018 Min. :0.011 Min. : 1.9 Min. : 1.3 Min. : 0.85 Min. :3.1e+02 Min. :3.1e+04 Min. :9.2e+05 Min. :3.5e+03 Min. : 0.036 Min. :0.00042 Min. :1.7e-06 Min. :0.0e+00 Min. :4.2e+03 Min. :4.6e+06 Min. :3.8e+06 Min. :1.4e+03 Min. :2.0e+00 Min. :4.4e+05 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. : 23 Min. :-1.6e+02 Min. :8.1e-05 Min. :0.00016 Min. :0.013 Min. : 1.5 Min. : 1.2 Min. : 0.83 Min. : 7.4 Min. : 4.9 Min. : 0.85 Min. : 3.7 Min. : 0.85 Min. : 5.5 Min. : 2.2 Min. : 3.2 Min. : 0.024 Min. : 2.7 Min. : 5.5 Min. : 1.3 Min. : 4 Min. : 0.88 Min. : 3.9 Min. : 5.2 Min. :0.74 Min. : 3.3 Min. : 6.4 Min. : 0.03 Min. : 5.8 Min. : 2.3 Min. : 0.98 Min. : 3.1 Min. : 0.71 Min. : 3.9 Min. : 1.3 Min. : 2.0 Min. :7.1e-03 Min. : 2.9 Min. : 2.2 Min. : 2.4 Min. : 1.8 Min. : 0.87 Min. : 3.1 Min. : 2 Min. :0.68 Min. : 0.92 Min. : 5.1 Min. :7.4e-03 Min. : 1 Min. : 1 Min. : 0.54 Min. : 0.096 Min. :0.036 Min. :0.0017 Min. : 0.04 Min. : 3.8 Min. :0.0011 Min. : 1.0 Min. :3.0e+02 Min. :2.6e+04 Min. :6.9e+05 Min. :2.4e+03 Min. : 0.23 Min. : 0.018 Min. :0.00045 Min. :0.0e+00 Min. :1.7e+03 Min. :5.9e+05 Min. :1.0e+06 Min. :1.0e+03 Min. :3.0e+00 Min. :2.2e+05 Min. :6.3e-02 Min. :1.1e-03 Min. : 0 Min. :0.0e+00 Min. : 0.000 Min. :0.0e+00 Min. : 67 Min. :-1.4e+02 Min. :7.3e-05 Min. :0.00015 Min. :0.012 Min. : 1.4 Min. : 0.96 Min. : 0.69 Min. :1.1e+02 Min. :3.9e+03 Min. :4.1e+04 Min. :4.4e+02 Min. : 0.035 Min. :4.2e-04 Min. :1.6e-06 Min. :0.0e+00 Min. :9.2e+02 Min. :2.2e+05 Min. :1.8e+05 Min. :2.1e+02 Min. :1.0e+00 Min. :3.4e+04 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. : 12 Min. :-1.6e+02 Min. :6.7e-05 Min. :0.00013 Min. :0.012 Min. : 1.4 Min. : 0.94 Min. : 0.69 Min. : 6.2 Min. : 4 Min. : 0.71 Min. : 3.9 Min. : 0.63 Min. : 4.6 Min. : 1.3 Min. : 3.1 Min. : 0.018 Min. : 3.1 Min. : 4.2 Min. : 0.37 Min. : 3.1 Min. : 0.94 Min. : 3.2 Min. : 3.6 Min. :0.7 Min. : 2 Min. : 5 Min. : 0.021 Min. : 5.0 Min. : 1.7 Min. : 1.3 Min. : 3.1 Min. : 0.44 Min. : 3.7 Min. : 0.74 Min. : 2.4 Min. : 0.012 Min. : 1.6 Min. : 1.5 Min. : 0.75 Min. : 1.1 Min. : 0.9 Min. : 2.3 Min. : 1.6 Min. :0.62 Min. : 1 Min. : 4.4 Min. :5.1e-03 Min. : 0 Min. : 0 Min. : 0.26 Min. : 0.028 Min. :0.039 Min. :0.00025 Min. : 0.04 Min. : 3.8 Min. :0.00017 Min. : 1.0 Min. :7.2e+01 Min. :1.7e+03 Min. :1.2e+04 Min. :-6.1e+01 Min. : 0.22 Min. : 0.017 Min. :0.00037 Min. :0.0e+00 Min. :1.9e+02 Min. :9.3e+03 Min. :7.1e+03 Min. :-2.2e+01 Min. :0.0e+00 Min. :4.5e+03 Min. :5.7e-02 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. : 32 Min. :-1.5e+02 Min. :3.1e-05 Min. :4.9e-05 Min. :0.01 Min. : 0.87 Min. : 0.58 Min. : 0.42 Min. :1.0e+01 Min. :2.6e+01 Min. :2.0e+01 Min. :-1.5e+01 Min. : 0.035 Min. :4.2e-04 Min. :0.0e+00 Min. :0.0e+00 Min. :2.7e+01 Min. :1.8e+02 Min. :1.6e+02 Min. :-6.0e+00 Min. :0.0e+00 Min. :8.8e+01 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. :0.0e+00 Min. : 3.5 Min. :-1.7e+02 Min. :3.1e-05 Min. :4.9e-05 Min. :0.0099 Min. : 0.87 Min. : 0.58 Min. : 0.42 Min. : 5.8 Min. : 2.8 Min. : 0.91 Min. : 3.8 Min. : 0.66 Min. : 4 Min. : 0.97 Min. : 2.6 Min. :2.4e-03 Min. : 1.6 Min. : 3.2 Min. : 0.061 Min. : 2.2 Min. : 0.88 Min. : 2.7 Min. : 2.2 Min. :0.67 Min. : 0.88 Min. : 4.9 Min. :9.5e-03 Min. : 3.2 Min. : 0.92 Min. : 1.2 Min. : 2.0 Min. : 0.53 Min. : 2.3 Min. : 0.46 Min. : 2.0 Min. :5.7e-03 Min. : 0.78 Min. : 1.1 Min. : 0.029 Min. : 0.82 Min. : 0.64 Min. : 1.7 Min. : 0.78 Min. :0.61 Min. : 0.4 Min. : 2.6 Min. :6.4e-03 Min. :0.48 Min. :-1.1e-07 Min. :0.010 Min. :0.0001 Min. :-7.483 Min. : 0.048
Class :character 1st Qu.: 4 1st Qu.: 30 1st Qu.: 4 1st Qu.: 30 1st Qu.: 225 1st Qu.: 4.07 1st Qu.:0.01322 1st Qu.:0.0732 1st Qu.:0 1st Qu.: 0.61 1st Qu.:0.1289 1st Qu.: 5 1st Qu.: 5 1st Qu.: 7.81 1st Qu.: 4.05 1st Qu.:0.381 1st Qu.:0.074 1st Qu.: 0.61 1st Qu.: 5.6 1st Qu.:0.0650 1st Qu.: 1.0 1st Qu.:3.3e+04 1st Qu.:2.8e+08 1st Qu.:6.6e+11 1st Qu.:1.5e+09 1st Qu.: 0.27 1st Qu.:0.022 1st Qu.:0.00052 1st Qu.:6.4e-04 1st Qu.:1.6e+06 1st Qu.:3.9e+11 1st Qu.:8.7e+11 1st Qu.:7.8e+08 1st Qu.:1.1e+08 1st Qu.:2.3e+10 1st Qu.:9.8e-02 1st Qu.:1.9e-03 1st Qu.: 0 1st Qu.:3.0e-04 1st Qu.:4.0e-05 1st Qu.:1.0e-02 1st Qu.: 976 1st Qu.:-7.8e-01 1st Qu.:0.08866 1st Qu.:0.21994 1st Qu.:0.372 1st Qu.: 4.1 1st Qu.: 2.7 1st Qu.: 2.05 1st Qu.:1.6e+04 1st Qu.:6.7e+07 1st Qu.:7.6e+10 1st Qu.:3.1e+08 1st Qu.: 0.368 1st Qu.:0.03857 1st Qu.:1.2e-03 1st Qu.:1.8e-03 1st Qu.:7.4e+05 1st Qu.:8.6e+10 1st Qu.:2.0e+11 1st Qu.:1.7e+08 1st Qu.:3.4e+07 1st Qu.:5.4e+09 1st Qu.:2.0e-01 1st Qu.:7.5e-03 1st Qu.:0.0e+00 1st Qu.:9.0e-04 1st Qu.:2.0e-04 1st Qu.:0.0e+00 1st Qu.: 506 1st Qu.:-8.5e-01 1st Qu.:8.5e-02 1st Qu.:0.21494 1st Qu.:0.371 1st Qu.: 3.8 1st Qu.: 2.6 1st Qu.: 1.96 1st Qu.: 16.6 1st Qu.: 15.3 1st Qu.: 6.59 1st Qu.: 10.5 1st Qu.: 6.34 1st Qu.: 15.8 1st Qu.: 12.5 1st Qu.: 12.2 1st Qu.: 5.774 1st Qu.: 20.7 1st Qu.: 19.6 1st Qu.: 15.4 1st Qu.: 17 1st Qu.: 5.79 1st Qu.: 12.4 1st Qu.: 20.3 1st Qu.:1.26 1st Qu.: 17.0 1st Qu.: 14.1 1st Qu.: 8.67 1st Qu.: 11.4 1st Qu.: 11.0 1st Qu.: 6.05 1st Qu.: 7.8 1st Qu.: 4.74 1st Qu.: 10.9 1st Qu.: 8.3 1st Qu.: 8.2 1st Qu.:3.9e+00 1st Qu.: 15.4 1st Qu.: 13.1 1st Qu.: 12.0 1st Qu.: 11.7 1st Qu.: 5.31 1st Qu.: 9.0 1st Qu.: 16 1st Qu.:1.25 1st Qu.: 13.84 1st Qu.: 10.0 1st Qu.:7.9e+00 1st Qu.: 3 1st Qu.: 3 1st Qu.: 5.13 1st Qu.: 2.841 1st Qu.:0.418 1st Qu.:0.0616 1st Qu.: 0.61 1st Qu.: 5.6 1st Qu.:0.0525 1st Qu.: 1.0 1st Qu.:1.7e+04 1st Qu.:7.1e+07 1st Qu.:8.2e+10 1st Qu.:2.1e+08 1st Qu.: 0.26 1st Qu.: 0.021 1st Qu.:0.00049 1st Qu.:4.0e-04 1st Qu.:6.2e+05 1st Qu.:5.8e+10 1st Qu.:1.4e+11 1st Qu.:1.0e+08 1st Qu.:1.2e+07 1st Qu.:4.7e+09 1st Qu.:8.8e-02 1st Qu.:1.7e-03 1st Qu.: 0 1st Qu.:2.0e-04 1st Qu.: 0.000 1st Qu.:1.0e-02 1st Qu.: 641 1st Qu.:-6.3e-01 1st Qu.:8.0e-02 1st Qu.:0.20797 1st Qu.:0.378 1st Qu.: 3.6 1st Qu.: 2.35 1st Qu.: 1.78 1st Qu.:9.3e+03 1st Qu.:2.1e+07 1st Qu.:1.3e+10 1st Qu.:5.2e+07 1st Qu.: 0.343 1st Qu.:3.4e-02 1st Qu.:9.8e-04 1st Qu.:1.0e-03 1st Qu.:3.3e+05 1st Qu.:1.6e+10 1st Qu.:3.8e+10 1st Qu.:2.7e+07 1st Qu.:4.5e+06 1st Qu.:1.4e+09 1st Qu.:1.7e-01 1st Qu.:6.0e-03 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.: 355 1st Qu.:-6.7e-01 1st Qu.:7.8e-02 1st Qu.:0.20445 1st Qu.:0.379 1st Qu.: 3.4 1st Qu.: 2.24 1st Qu.: 1.72 1st Qu.: 13.1 1st Qu.: 12 1st Qu.: 6.59 1st Qu.: 8.7 1st Qu.: 4.93 1st Qu.: 12.0 1st Qu.: 9.9 1st Qu.: 9.6 1st Qu.: 4.625 1st Qu.: 16.5 1st Qu.: 15.3 1st Qu.: 11.98 1st Qu.: 13.3 1st Qu.: 5.41 1st Qu.: 9.2 1st Qu.: 15.6 1st Qu.:1.3 1st Qu.: 13 1st Qu.: 11 1st Qu.: 6.500 1st Qu.: 9.8 1st Qu.: 9.2 1st Qu.: 6.2 1st Qu.: 7.5 1st Qu.: 4.14 1st Qu.: 8.9 1st Qu.: 6.43 1st Qu.: 6.9 1st Qu.: 3.150 1st Qu.: 12.8 1st Qu.: 10.3 1st Qu.: 9.70 1st Qu.: 8.9 1st Qu.: 5.2 1st Qu.: 7.2 1st Qu.: 12.1 1st Qu.:1.28 1st Qu.: 10 1st Qu.: 8.6 1st Qu.:5.5e+00 1st Qu.: 3 1st Qu.: 3 1st Qu.: 3.10 1st Qu.: 1.832 1st Qu.:0.453 1st Qu.:0.04870 1st Qu.: 0.61 1st Qu.: 5.6 1st Qu.:0.04018 1st Qu.: 1.0 1st Qu.:7.5e+03 1st Qu.:1.3e+07 1st Qu.:6.5e+09 1st Qu.: 1.4e+07 1st Qu.: 0.25 1st Qu.: 0.020 1st Qu.:0.00047 1st Qu.:0.0e+00 1st Qu.:1.8e+05 1st Qu.:5.2e+09 1st Qu.:1.1e+10 1st Qu.: 6.5e+06 1st Qu.:5.9e+05 1st Qu.:5.9e+08 1st Qu.:8.1e-02 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.: 387 1st Qu.:-4.5e-01 1st Qu.:7.7e-02 1st Qu.:2.1e-01 1st Qu.:0.40 1st Qu.: 2.94 1st Qu.: 1.95 1st Qu.: 1.51 1st Qu.:4.4e+03 1st Qu.:4.4e+06 1st Qu.:1.3e+09 1st Qu.: 4.5e+06 1st Qu.: 0.318 1st Qu.:3.0e-02 1st Qu.:8.2e-04 1st Qu.:0.0e+00 1st Qu.:1.1e+05 1st Qu.:1.7e+09 1st Qu.:3.7e+09 1st Qu.: 2.2e+06 1st Qu.:3.0e+05 1st Qu.:2.0e+08 1st Qu.:1.4e-01 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.:0.0e+00 1st Qu.: 229.0 1st Qu.:-4.7e-01 1st Qu.:7.6e-02 1st Qu.:2.0e-01 1st Qu.:0.4007 1st Qu.: 2.79 1st Qu.: 1.88 1st Qu.: 1.47 1st Qu.: 10.9 1st Qu.: 9.6 1st Qu.: 6.77 1st Qu.: 8.4 1st Qu.: 4.42 1st Qu.: 9 1st Qu.: 7.18 1st Qu.: 7.1 1st Qu.:3.4e+00 1st Qu.: 12.4 1st Qu.: 11.0 1st Qu.: 8.760 1st Qu.: 9.4 1st Qu.: 5.51 1st Qu.: 7.4 1st Qu.: 11.3 1st Qu.:1.34 1st Qu.: 8.90 1st Qu.: 9.7 1st Qu.:4.6e+00 1st Qu.: 9.3 1st Qu.: 7.39 1st Qu.: 6.5 1st Qu.: 7.7 1st Qu.: 4.13 1st Qu.: 7.7 1st Qu.: 5.02 1st Qu.: 5.8 1st Qu.:2.4e+00 1st Qu.: 10.11 1st Qu.: 7.7 1st Qu.: 7.401 1st Qu.: 6.60 1st Qu.: 5.36 1st Qu.: 6.7 1st Qu.: 8.40 1st Qu.:1.33 1st Qu.: 6.8 1st Qu.: 8.6 1st Qu.:3.3e+00 1st Qu.:1.80 1st Qu.:-4.6e-11 1st Qu.:0.092 1st Qu.:0.0085 1st Qu.:-0.857 1st Qu.: 1.186
Mode :character Median : 6 Median : 48 Median : 6 Median : 48 Median : 364 Median : 8.61 Median :0.01961 Median :0.1030 Median :0 Median : 0.94 Median :0.1821 Median : 26 Median : 26 Median : 15.41 Median : 8.53 Median :0.532 Median :0.131 Median : 0.94 Median : 7.6 Median :0.1211 Median : 1.0 Median :1.1e+05 Median :3.0e+09 Median :2.3e+13 Median :5.5e+10 Median : 0.38 Median :0.034 Median :0.00079 Median :5.7e-03 Median :9.5e+06 Median :1.3e+13 Median :3.7e+13 Median :3.1e+10 Median :9.5e+09 Median :4.7e+11 Median :2.3e-01 Median :6.8e-03 Median : 0 Median :3.2e-03 Median :1.1e-03 Median :4.0e-02 Median : 1926 Median : 2.1e-04 Median :0.17421 Median :0.39055 Median :0.577 Median : 6.0 Median : 3.6 Median : 2.67 Median :5.4e+04 Median :7.3e+08 Median :2.7e+12 Median :1.3e+10 Median : 0.586 Median :0.08264 Median :2.9e-03 Median :1.8e-02 Median :4.2e+06 Median :2.6e+12 Median :7.4e+12 Median :7.7e+09 Median :3.1e+09 Median :1.0e+11 Median :5.4e-01 Median :4.0e-02 Median :0.0e+00 Median :1.1e-02 Median :4.6e-03 Median :0.0e+00 Median : 1066 Median : 1.1e-04 Median :1.7e-01 Median :0.39101 Median :0.581 Median : 5.7 Median : 3.4 Median : 2.51 Median : 28.1 Median : 21.4 Median : 10.72 Median : 18.7 Median : 11.25 Median : 26.0 Median : 22.0 Median : 19.0 Median : 10.389 Median : 29.9 Median : 32.9 Median : 22.8 Median : 29 Median : 12.95 Median : 21.6 Median : 32.9 Median :1.40 Median : 28.3 Median : 24.7 Median : 15.73 Median : 20.8 Median : 16.0 Median : 9.26 Median : 15.0 Median : 8.27 Median : 18.9 Median : 18.4 Median : 13.2 Median :8.8e+00 Median : 22.5 Median : 25.1 Median : 17.9 Median : 23.2 Median : 10.48 Median : 16.3 Median : 25 Median :1.38 Median : 22.15 Median : 18.8 Median :1.4e+01 Median : 17 Median : 17 Median : 11.35 Median : 6.868 Median :0.582 Median :0.1181 Median : 0.94 Median : 7.6 Median :0.1070 Median : 1.0 Median :6.6e+04 Median :1.1e+09 Median :4.9e+12 Median :1.5e+10 Median : 0.37 Median : 0.032 Median :0.00073 Median :5.0e-03 Median :4.6e+06 Median :3.1e+12 Median :8.7e+12 Median :8.2e+09 Median :2.4e+09 Median :1.4e+11 Median :2.2e-01 Median :6.0e-03 Median : 0 Median :2.9e-03 Median : 0.001 Median :4.0e-02 Median : 1419 Median : 8.0e-05 Median :1.8e-01 Median :0.39537 Median :0.597 Median : 5.4 Median : 3.25 Median : 2.42 Median :3.7e+04 Median :3.4e+08 Median :8.4e+11 Median :4.0e+09 Median : 0.553 Median :7.4e-02 Median :2.6e-03 Median :1.3e-02 Median :2.4e+06 Median :8.3e+11 Median :2.2e+12 Median :2.4e+09 Median :9.5e+08 Median :3.9e+10 Median :4.8e-01 Median :3.2e-02 Median :0.0e+00 Median :8.0e-03 Median :3.0e-03 Median :0.0e+00 Median : 859 Median : 3.0e-05 Median :1.8e-01 Median :0.39667 Median :0.600 Median : 5.1 Median : 3.04 Median : 2.29 Median : 23.0 Median : 18 Median : 8.94 Median : 15.1 Median : 9.48 Median : 21.5 Median : 17.8 Median : 16.1 Median : 8.631 Median : 25.1 Median : 26.9 Median : 18.90 Median : 23.8 Median : 10.47 Median : 17.5 Median : 26.9 Median :1.5 Median : 23 Median : 20 Median : 12.364 Median : 17.6 Median : 14.3 Median : 7.8 Median : 12.4 Median : 7.29 Median : 16.2 Median : 14.93 Median : 11.7 Median : 6.802 Median : 20.0 Median : 21.1 Median : 15.62 Median : 19.2 Median : 8.8 Median : 13.7 Median : 21.4 Median :1.47 Median : 19 Median : 15.6 Median :1.1e+01 Median : 10 Median : 10 Median : 8.28 Median : 5.415 Median :0.631 Median :0.10491 Median : 0.94 Median : 7.6 Median :0.09301 Median : 1.0 Median :3.9e+04 Median :3.8e+08 Median :1.0e+12 Median : 3.1e+09 Median : 0.33 Median : 0.027 Median :0.00061 Median :3.0e-03 Median :2.1e+06 Median :6.8e+11 Median :1.8e+12 Median : 1.7e+09 Median :3.4e+08 Median :3.7e+10 Median :1.7e-01 Median :0.0e+00 Median :0.0e+00 Median :1.0e-03 Median :0.0e+00 Median :0.0e+00 Median : 1035 Median : 5.0e-05 Median :2.0e-01 Median :4.2e-01 Median :0.62 Median : 4.62 Median : 2.89 Median : 2.19 Median :2.4e+04 Median :1.4e+08 Median :2.3e+11 Median : 1.0e+09 Median : 0.502 Median :6.4e-02 Median :2.2e-03 Median :1.0e-02 Median :1.3e+06 Median :2.4e+11 Median :6.0e+11 Median : 5.8e+08 Median :1.6e+08 Median :1.4e+10 Median :3.8e-01 Median :0.0e+00 Median :0.0e+00 Median :0.0e+00 Median :0.0e+00 Median :0.0e+00 Median : 676.9 Median : 1.0e-05 Median :2.1e-01 Median :4.2e-01 Median :0.6239 Median : 4.33 Median : 2.72 Median : 2.08 Median : 18.2 Median : 15.7 Median : 8.40 Median : 12.0 Median : 7.46 Median : 17 Median : 14.03 Median : 13.4 Median :6.9e+00 Median : 20.8 Median : 21.4 Median : 15.390 Median : 18.7 Median : 7.76 Median : 13.7 Median : 21.3 Median :1.59 Median : 17.66 Median : 15.6 Median :9.4e+00 Median : 14.2 Median : 12.71 Median : 8.0 Median : 10.2 Median : 6.11 Median : 13.4 Median : 11.06 Median : 10.2 Median :5.1e+00 Median : 17.45 Median : 16.6 Median : 13.383 Median : 14.85 Median : 7.34 Median : 11.1 Median : 17.77 Median :1.59 Median : 15.4 Median : 12.5 Median :8.8e+00 Median :2.05 Median : 9.2e-13 Median :0.124 Median :0.0154 Median :-0.675 Median : 1.903
NA Mean : 13 Mean : 97 Mean : 13 Mean : 100 Mean : 897 Mean : 18.78 Mean :0.02477 Mean :0.1282 Mean :0 Mean : 1.42 Mean :0.2326 Mean : 355 Mean : 355 Mean : 34.68 Mean : 18.57 Mean :0.621 Mean :0.223 Mean : 1.42 Mean : 10.2 Mean :0.2298 Mean : 1.1 Mean :1.8e+06 Mean :1.2e+13 Mean :2.3e+20 Mean :4.8e+16 Mean : 0.49 Mean :0.062 Mean :0.00203 Mean :5.7e-02 Mean :3.6e+09 Mean :2.2e+20 Mean :1.6e+23 Mean :3.1e+16 Mean :1.9e+16 Mean :2.5e+18 Mean :5.5e-01 Mean :9.1e-02 Mean : 22 Mean :3.9e-02 Mean :2.6e-02 Mean :7.4e-01 Mean : 4335 Mean : 4.7e-02 Mean :0.24863 Mean :0.43057 Mean :0.557 Mean : 8.2 Mean : 4.6 Mean : 3.03 Mean :8.5e+05 Mean :1.5e+12 Mean :1.7e+18 Mean :2.7e+15 Mean : 0.730 Mean :0.14206 Mean :7.4e-03 Mean :2.8e-01 Mean :1.1e+09 Mean :8.4e+18 Mean :1.1e+21 Mean :1.7e+15 Mean :1.0e+15 Mean :3.2e+16 Mean :1.8e+00 Mean :6.9e-01 Mean :2.1e+04 Mean :2.1e-01 Mean :1.7e-01 Mean :7.3e+01 Mean : 2321 Mean : 4.8e-02 Mean :2.5e-01 Mean :0.43089 Mean :0.559 Mean : 7.9 Mean : 4.3 Mean : 2.86 Mean : 42.4 Mean : 27.1 Mean : 17.89 Mean : 29.1 Mean : 15.56 Mean : 36.2 Mean : 32.7 Mean : 25.2 Mean : 15.317 Mean : 39.6 Mean : 48.1 Mean : 29.1 Mean : 43 Mean : 18.88 Mean : 29.9 Mean : 45.2 Mean :1.42 Mean : 38.9 Mean : 37.7 Mean : 21.21 Mean : 31.5 Mean : 19.9 Mean : 15.38 Mean : 23.2 Mean :11.73 Mean : 26.6 Mean : 25.8 Mean : 18.0 Mean :1.3e+01 Mean : 29.2 Mean : 35.6 Mean : 22.6 Mean : 32.8 Mean : 15.47 Mean : 22.8 Mean : 34 Mean :1.41 Mean : 29.74 Mean : 28.7 Mean :1.8e+01 Mean : 298 Mean : 298 Mean : 26.83 Mean : 16.035 Mean :0.677 Mean :0.2128 Mean : 1.42 Mean : 10.2 Mean :0.2166 Mean : 1.3 Mean :1.4e+06 Mean :6.2e+12 Mean :6.4e+19 Mean :2.3e+16 Mean : 0.53 Mean : 0.073 Mean :0.00262 Mean :1.3e-01 Mean :2.7e+09 Mean :9.3e+19 Mean :1.0e+23 Mean :1.5e+16 Mean :9.0e+15 Mean :1.6e+18 Mean :7.4e-01 Mean :2.1e-01 Mean : 46 Mean :1.0e-01 Mean : 0.084 Mean :1.3e+00 Mean : 3354 Mean : 4.1e-02 Mean :2.6e-01 Mean :0.43148 Mean :0.566 Mean : 7.7 Mean : 4.16 Mean : 2.76 Mean :7.1e+05 Mean :1.1e+12 Mean :9.6e+17 Mean :1.9e+15 Mean : 0.753 Mean :1.5e-01 Mean :8.1e-03 Mean :6.1e-01 Mean :9.4e+08 Mean :5.6e+18 Mean :8.3e+20 Mean :1.2e+15 Mean :7.7e+14 Mean :2.3e+16 Mean :2.3e+00 Mean :1.6e+00 Mean :3.8e+04 Mean :5.4e-01 Mean :4.8e-01 Mean :1.2e+02 Mean : 2004 Mean : 4.2e-02 Mean :2.6e-01 Mean :0.43219 Mean :0.568 Mean : 7.4 Mean : 3.97 Mean : 2.62 Mean : 37.0 Mean : 23 Mean : 16.33 Mean : 25.7 Mean : 13.83 Mean : 31.3 Mean : 28.4 Mean : 22.1 Mean : 13.608 Mean : 33.9 Mean : 41.6 Mean : 24.79 Mean : 37.4 Mean : 16.85 Mean : 25.8 Mean : 38.8 Mean :1.5 Mean : 33 Mean : 33 Mean : 18.103 Mean : 28.9 Mean : 18.1 Mean : 14.6 Mean : 21.4 Mean :10.99 Mean : 24.3 Mean : 23.05 Mean : 16.6 Mean : 11.866 Mean : 26.4 Mean : 32.2 Mean : 20.21 Mean : 29.4 Mean : 14.4 Mean : 20.7 Mean : 30.1 Mean :1.50 Mean : 26 Mean : 26.2 Mean :1.6e+01 Mean : 249 Mean : 249 Mean : 20.75 Mean : 13.723 Mean :0.731 Mean :0.20179 Mean : 1.42 Mean : 10.2 Mean :0.20337 Mean : 1.4 Mean :1.0e+06 Mean :3.3e+12 Mean :2.0e+19 Mean : 1.1e+16 Mean : 0.57 Mean : 0.087 Mean :0.00349 Mean :3.7e-01 Mean :2.0e+09 Mean :4.3e+19 Mean :6.4e+22 Mean : 7.2e+15 Mean :4.5e+15 Mean :9.7e+17 Mean :1.1e+00 Mean :9.7e-01 Mean :3.5e+02 Mean :3.4e-01 Mean :3.1e-01 Mean :4.6e+00 Mean : 2594 Mean : 3.6e-02 Mean :2.7e-01 Mean :4.4e-01 Mean :0.58 Mean : 7.03 Mean : 3.76 Mean : 2.49 Mean :5.9e+05 Mean :7.6e+11 Mean :5.7e+17 Mean : 1.4e+15 Mean : 0.771 Mean :1.6e-01 Mean :9.5e-03 Mean :2.8e+00 Mean :7.7e+08 Mean :3.8e+18 Mean :5.8e+20 Mean : 8.9e+14 Mean :5.7e+14 Mean :1.7e+16 Mean :3.8e+00 Mean :1.6e+01 Mean :3.0e+05 Mean :3.0e+00 Mean :3.1e+00 Mean :5.0e+02 Mean : 1715.4 Mean : 3.4e-02 Mean :2.7e-01 Mean :4.4e-01 Mean :0.5825 Mean : 6.80 Mean : 3.60 Mean : 2.38 Mean : 32.2 Mean : 19.9 Mean : 15.05 Mean : 22.8 Mean : 12.26 Mean : 27 Mean : 24.12 Mean : 19.1 Mean :1.2e+01 Mean : 28.7 Mean : 35.5 Mean : 20.821 Mean : 31.7 Mean : 15.09 Mean : 22.3 Mean : 32.7 Mean :1.66 Mean : 27.72 Mean : 28.5 Mean :1.5e+01 Mean : 26.6 Mean : 16.21 Mean : 14.0 Mean : 19.9 Mean :10.30 Mean : 22.1 Mean : 20.31 Mean : 15.3 Mean :1.1e+01 Mean : 23.50 Mean : 28.6 Mean : 17.850 Mean : 25.98 Mean : 13.49 Mean : 18.8 Mean : 26.57 Mean :1.65 Mean : 23.1 Mean : 24.0 Mean :1.4e+01 Mean :2.13 Mean : 3.3e-11 Mean :0.130 Mean :0.0198 Mean :-0.708 Mean : 2.678
NA 3rd Qu.: 17 3rd Qu.: 121 3rd Qu.: 18 3rd Qu.: 128 3rd Qu.: 835 3rd Qu.: 20.89 3rd Qu.:0.02965 3rd Qu.:0.1480 3rd Qu.:0 3rd Qu.: 1.58 3rd Qu.:0.2650 3rd Qu.: 155 3rd Qu.: 155 3rd Qu.: 36.66 3rd Qu.: 20.23 3rd Qu.:0.737 3rd Qu.:0.247 3rd Qu.: 1.58 3rd Qu.: 11.8 3rd Qu.:0.2498 3rd Qu.: 1.0 3rd Qu.:6.7e+05 3rd Qu.:9.3e+10 3rd Qu.:3.0e+15 3rd Qu.:7.1e+12 3rd Qu.: 0.61 3rd Qu.:0.070 3rd Qu.:0.00182 3rd Qu.:3.2e-02 3rd Qu.:1.6e+08 3rd Qu.:2.8e+15 3rd Qu.:1.2e+16 3rd Qu.:4.3e+12 3rd Qu.:1.8e+12 3rd Qu.:5.4e+13 3rd Qu.:5.9e-01 3rd Qu.:3.6e-02 3rd Qu.: 0 3rd Qu.:2.0e-02 3rd Qu.:9.6e-03 3rd Qu.:2.0e-01 3rd Qu.: 4583 3rd Qu.: 8.3e-01 3rd Qu.:0.36817 3rd Qu.:0.62528 3rd Qu.:0.748 3rd Qu.: 10.3 3rd Qu.: 5.4 3rd Qu.: 3.54 3rd Qu.:3.0e+05 3rd Qu.:1.8e+10 3rd Qu.:2.5e+14 3rd Qu.:1.5e+12 3rd Qu.: 0.940 3rd Qu.:0.17312 3rd Qu.:7.3e-03 3rd Qu.:1.2e-01 3rd Qu.:6.8e+07 3rd Qu.:5.1e+14 3rd Qu.:2.4e+15 3rd Qu.:1.0e+12 3rd Qu.:5.3e+11 3rd Qu.:1.1e+13 3rd Qu.:1.5e+00 3rd Qu.:2.2e-01 3rd Qu.:1.0e+00 3rd Qu.:8.3e-02 3rd Qu.:4.7e-02 3rd Qu.:1.0e+00 3rd Qu.: 2529 3rd Qu.: 9.0e-01 3rd Qu.:3.8e-01 3rd Qu.:0.63050 3rd Qu.:0.754 3rd Qu.: 10.0 3rd Qu.: 5.1 3rd Qu.: 3.30 3rd Qu.: 55.2 3rd Qu.: 33.1 3rd Qu.: 22.93 3rd Qu.: 38.2 3rd Qu.: 20.23 3rd Qu.: 46.6 3rd Qu.: 43.7 3rd Qu.: 31.7 3rd Qu.: 19.967 3rd Qu.: 49.5 3rd Qu.: 63.2 3rd Qu.: 36.5 3rd Qu.: 58 3rd Qu.: 25.26 3rd Qu.: 38.9 3rd Qu.: 58.7 3rd Qu.:1.56 3rd Qu.: 51.1 3rd Qu.: 49.4 3rd Qu.: 28.20 3rd Qu.: 40.9 3rd Qu.: 24.6 3rd Qu.: 19.73 3rd Qu.: 30.3 3rd Qu.:15.02 3rd Qu.: 34.4 3rd Qu.: 34.9 3rd Qu.: 22.7 3rd Qu.:1.8e+01 3rd Qu.: 36.0 3rd Qu.: 46.9 3rd Qu.: 28.5 3rd Qu.: 43.6 3rd Qu.: 20.29 3rd Qu.: 29.7 3rd Qu.: 43 3rd Qu.:1.55 3rd Qu.: 38.40 3rd Qu.: 37.5 3rd Qu.:2.4e+01 3rd Qu.: 106 3rd Qu.: 106 3rd Qu.: 27.44 3rd Qu.: 17.681 3rd Qu.:0.804 3rd Qu.:0.2358 3rd Qu.: 1.58 3rd Qu.: 11.8 3rd Qu.:0.2347 3rd Qu.: 1.0 3rd Qu.:4.3e+05 3rd Qu.:3.7e+10 3rd Qu.:7.5e+14 3rd Qu.:2.7e+12 3rd Qu.: 0.67 3rd Qu.: 0.080 3rd Qu.:0.00208 3rd Qu.:4.7e-02 3rd Qu.:8.9e+07 3rd Qu.:8.6e+14 3rd Qu.:4.1e+15 3rd Qu.:1.8e+12 3rd Qu.:8.3e+11 3rd Qu.:2.0e+13 3rd Qu.:7.5e-01 3rd Qu.:5.0e-02 3rd Qu.: 0 3rd Qu.:3.1e-02 3rd Qu.: 0.016 3rd Qu.:2.8e-01 3rd Qu.: 3430 3rd Qu.: 6.7e-01 3rd Qu.:4.0e-01 3rd Qu.:0.63990 3rd Qu.:0.762 3rd Qu.: 9.7 3rd Qu.: 4.94 3rd Qu.: 3.28 3rd Qu.:2.1e+05 3rd Qu.:9.2e+09 3rd Qu.:9.3e+13 3rd Qu.:7.5e+11 3rd Qu.: 0.962 3rd Qu.:1.7e-01 3rd Qu.:7.0e-03 3rd Qu.:1.4e-01 3rd Qu.:4.4e+07 3rd Qu.:2.0e+14 3rd Qu.:1.0e+15 3rd Qu.:5.2e+11 3rd Qu.:2.9e+11 3rd Qu.:5.0e+12 3rd Qu.:1.6e+00 3rd Qu.:2.3e-01 3rd Qu.:1.0e+00 3rd Qu.:1.0e-01 3rd Qu.:6.1e-02 3rd Qu.:1.0e+00 3rd Qu.: 2210 3rd Qu.: 7.0e-01 3rd Qu.:4.1e-01 3rd Qu.:0.64446 3rd Qu.:0.767 3rd Qu.: 9.4 3rd Qu.: 4.68 3rd Qu.: 3.07 3rd Qu.: 48.1 3rd Qu.: 29 3rd Qu.: 20.38 3rd Qu.: 33.5 3rd Qu.: 18.06 3rd Qu.: 40.6 3rd Qu.: 37.9 3rd Qu.: 27.9 3rd Qu.: 17.616 3rd Qu.: 42.6 3rd Qu.: 54.9 3rd Qu.: 31.34 3rd Qu.: 49.7 3rd Qu.: 22.40 3rd Qu.: 33.7 3rd Qu.: 50.5 3rd Qu.:1.7 3rd Qu.: 44 3rd Qu.: 43 3rd Qu.: 24.236 3rd Qu.: 37.5 3rd Qu.: 23.0 3rd Qu.: 18.1 3rd Qu.: 27.7 3rd Qu.:14.11 3rd Qu.: 31.6 3rd Qu.: 31.76 3rd Qu.: 21.2 3rd Qu.: 16.326 3rd Qu.: 33.0 3rd Qu.: 43.0 3rd Qu.: 26.23 3rd Qu.: 39.6 3rd Qu.: 18.7 3rd Qu.: 27.2 3rd Qu.: 39.3 3rd Qu.:1.69 3rd Qu.: 35 3rd Qu.: 34.1 3rd Qu.:2.2e+01 3rd Qu.: 71 3rd Qu.: 71 3rd Qu.: 20.67 3rd Qu.: 15.309 3rd Qu.:0.870 3rd Qu.:0.22478 3rd Qu.: 1.58 3rd Qu.: 11.8 3rd Qu.:0.21934 3rd Qu.: 1.0 3rd Qu.:2.6e+05 3rd Qu.:1.4e+10 3rd Qu.:1.8e+14 3rd Qu.: 9.7e+11 3rd Qu.: 0.71 3rd Qu.: 0.086 3rd Qu.:0.00222 3rd Qu.:5.7e-02 3rd Qu.:4.8e+07 3rd Qu.:2.3e+14 3rd Qu.:1.2e+15 3rd Qu.: 6.4e+11 3rd Qu.:3.1e+11 3rd Qu.:6.6e+12 3rd Qu.:8.5e-01 3rd Qu.:6.0e-02 3rd Qu.:0.0e+00 3rd Qu.:3.9e-02 3rd Qu.:2.1e-02 3rd Qu.:3.0e-01 3rd Qu.: 2584 3rd Qu.: 4.7e-01 3rd Qu.:4.3e-01 3rd Qu.:6.6e-01 3rd Qu.:0.78 3rd Qu.: 9.05 3rd Qu.: 4.48 3rd Qu.: 3.04 3rd Qu.:1.5e+05 3rd Qu.:4.5e+09 3rd Qu.:3.3e+13 3rd Qu.: 3.4e+11 3rd Qu.: 0.948 3rd Qu.:1.6e-01 3rd Qu.:6.3e-03 3rd Qu.:1.4e-01 3rd Qu.:2.7e+07 3rd Qu.:7.1e+13 3rd Qu.:3.9e+14 3rd Qu.: 2.4e+11 3rd Qu.:1.4e+11 3rd Qu.:2.1e+12 3rd Qu.:1.6e+00 3rd Qu.:2.0e-01 3rd Qu.:1.0e+00 3rd Qu.:1.0e-01 3rd Qu.:6.0e-02 3rd Qu.:1.0e+00 3rd Qu.: 1913.7 3rd Qu.: 4.9e-01 3rd Qu.:4.4e-01 3rd Qu.:6.6e-01 3rd Qu.:0.7801 3rd Qu.: 8.81 3rd Qu.: 4.25 3rd Qu.: 2.85 3rd Qu.: 41.3 3rd Qu.: 24.8 3rd Qu.: 17.66 3rd Qu.: 28.8 3rd Qu.: 15.90 3rd Qu.: 35 3rd Qu.: 31.94 3rd Qu.: 24.3 3rd Qu.:1.5e+01 3rd Qu.: 36.4 3rd Qu.: 46.9 3rd Qu.: 26.736 3rd Qu.: 42.1 3rd Qu.: 19.47 3rd Qu.: 28.9 3rd Qu.: 43.1 3rd Qu.:1.88 3rd Qu.: 36.91 3rd Qu.: 36.5 3rd Qu.:2.0e+01 3rd Qu.: 33.8 3rd Qu.: 21.37 3rd Qu.: 16.2 3rd Qu.: 24.8 3rd Qu.:13.03 3rd Qu.: 28.7 3rd Qu.: 28.47 3rd Qu.: 19.5 3rd Qu.:1.4e+01 3rd Qu.: 30.10 3rd Qu.: 38.9 3rd Qu.: 24.036 3rd Qu.: 35.62 3rd Qu.: 16.97 3rd Qu.: 24.5 3rd Qu.: 35.61 3rd Qu.:1.88 3rd Qu.: 31.3 3rd Qu.: 30.5 3rd Qu.:1.9e+01 3rd Qu.:2.40 3rd Qu.: 5.1e-11 3rd Qu.:0.161 3rd Qu.:0.0258 3rd Qu.:-0.508 3rd Qu.: 3.175
NA Max. :106 Max. :1223 Max. :126 Max. :1223 Max. :90953 Max. :442.44 Max. :0.37088 Max. :1.9596 Max. :0 Max. :43.44 Max. :4.0352 Max. :114577 Max. :114577 Max. :2427.94 Max. :441.14 Max. :8.597 Max. :7.483 Max. :43.44 Max. :173.3 Max. :9.5676 Max. :28.0 Max. :2.3e+09 Max. :4.0e+17 Max. :2.9e+25 Max. :4.6e+21 Max. :33.57 Max. :6.011 Max. :0.41105 Max. :1.6e+02 Max. :1.6e+14 Max. :2.6e+25 Max. :2.7e+28 Max. :2.8e+21 Max. :1.5e+21 Max. :3.7e+23 Max. :2.0e+03 Max. :1.9e+02 Max. :3892018 Max. :1.6e+02 Max. :1.8e+02 Max. :6.6e+04 Max. :303493 Max. : 7.0e+01 Max. :0.99422 Max. :1.00000 Max. :1.000 Max. :202.8 Max. :34.5 Max. :19.93 Max. :3.7e+08 Max. :8.7e+15 Max. :4.2e+22 Max. :2.0e+20 Max. :186.777 Max. :9.53689 Max. :1.8e+00 Max. :1.3e+03 Max. :1.3e+13 Max. :7.8e+23 Max. :1.6e+26 Max. :1.1e+20 Max. :4.5e+19 Max. :2.3e+21 Max. :6.1e+04 Max. :2.2e+03 Max. :3.7e+09 Max. :1.3e+03 Max. :1.3e+03 Max. :1.1e+07 Max. :55142 Max. : 9.0e+01 Max. :1.0e+00 Max. :1.00000 Max. :1.000 Max. :202.5 Max. :32.8 Max. :19.38 Max. :558.7 Max. :269.2 Max. :366.99 Max. :446.1 Max. :208.13 Max. :455.1 Max. :476.2 Max. :297.3 Max. :299.013 Max. :420.8 Max. :608.4 Max. :326.5 Max. :562 Max. :315.57 Max. :407.5 Max. :534.5 Max. :2.24 Max. :465.6 Max. :530.3 Max. :313.09 Max. :204.7 Max. :114.7 Max. :121.89 Max. :155.7 Max. :88.54 Max. :171.2 Max. :165.8 Max. :120.9 Max. :1.2e+02 Max. :176.3 Max. :260.1 Max. :135.3 Max. :228.2 Max. :118.23 Max. :156.8 Max. :228 Max. :2.25 Max. :181.06 Max. :186.7 Max. :1.2e+02 Max. :69202 Max. :69202 Max. :1996.25 Max. :395.695 Max. :8.857 Max. :7.7455 Max. :43.44 Max. :173.3 Max. :9.7427 Max. :24.0 Max. :1.8e+09 Max. :1.4e+17 Max. :5.9e+24 Max. :1.9e+21 Max. :38.43 Max. :11.751 Max. :1.23161 Max. :5.5e+02 Max. :1.3e+14 Max. :8.0e+24 Max. :1.7e+28 Max. :9.6e+20 Max. :4.4e+20 Max. :2.4e+23 Max. :2.6e+03 Max. :1.0e+03 Max. :6717223 Max. :6.1e+02 Max. :640.948 Max. :1.0e+05 Max. :249531 Max. : 6.8e+01 Max. :9.9e-01 Max. :1.00000 Max. :1.000 Max. :202.4 Max. :32.06 Max. :19.06 Max. :3.3e+08 Max. :6.8e+15 Max. :2.7e+22 Max. :1.5e+20 Max. :205.735 Max. :1.4e+01 Max. :3.1e+00 Max. :4.7e+03 Max. :1.1e+13 Max. :6.2e+23 Max. :1.2e+26 Max. :8.4e+19 Max. :3.8e+19 Max. :1.7e+21 Max. :7.4e+04 Max. :3.0e+04 Max. :5.5e+09 Max. :4.7e+03 Max. :4.7e+03 Max. :1.5e+07 Max. :49462 Max. : 1.0e+02 Max. :1.0e+00 Max. :1.00000 Max. :1.000 Max. :202.2 Max. :30.77 Max. :18.69 Max. :470.3 Max. :244 Max. :291.56 Max. :371.0 Max. :191.76 Max. :403.3 Max. :371.7 Max. :260.7 Max. :263.618 Max. :346.4 Max. :483.6 Max. :284.60 Max. :451.6 Max. :295.23 Max. :368.1 Max. :422.5 Max. :3.2 Max. :375 Max. :447 Max. :275.172 Max. :195.0 Max. :108.7 Max. :111.8 Max. :147.5 Max. :86.32 Max. :163.5 Max. :158.59 Max. :117.4 Max. :117.454 Max. :170.9 Max. :252.0 Max. :128.13 Max. :218.4 Max. :112.8 Max. :147.4 Max. :221.7 Max. :3.25 Max. :175 Max. :182.2 Max. :1.2e+02 Max. :45564 Max. :45564 Max. :1632.54 Max. :351.187 Max. :8.973 Max. :7.95879 Max. :43.44 Max. :173.3 Max. :9.94605 Max. :26.0 Max. :1.5e+09 Max. :5.9e+16 Max. :2.0e+24 Max. : 1.2e+21 Max. :68.88 Max. :21.693 Max. :6.41781 Max. :3.4e+03 Max. :1.1e+14 Max. :5.3e+24 Max. :1.1e+28 Max. : 6.5e+20 Max. :2.6e+20 Max. :1.6e+23 Max. :8.4e+03 Max. :4.8e+04 Max. :7.0e+07 Max. :3.4e+03 Max. :3.4e+03 Max. :5.8e+05 Max. :204067 Max. : 1.1e+02 Max. :1.0e+00 Max. :1.0e+00 Max. :1.00 Max. :201.81 Max. :29.65 Max. :18.22 Max. :3.0e+08 Max. :5.2e+15 Max. :1.7e+22 Max. : 1.2e+20 Max. :382.977 Max. :1.2e+02 Max. :1.1e+02 Max. :2.0e+05 Max. :9.4e+12 Max. :4.8e+23 Max. :8.8e+25 Max. : 6.7e+19 Max. :3.2e+19 Max. :1.2e+21 Max. :2.6e+05 Max. :2.2e+06 Max. :6.7e+10 Max. :2.6e+05 Max. :3.1e+05 Max. :9.9e+07 Max. :43898.3 Max. : 1.2e+02 Max. :9.9e-01 Max. :1.0e+00 Max. :0.9999 Max. :201.71 Max. :28.75 Max. :17.68 Max. :414.4 Max. :220.7 Max. :228.84 Max. :319.7 Max. :195.16 Max. :345 Max. :322.34 Max. :262.0 Max. :1.9e+02 Max. :315.9 Max. :441.0 Max. :256.750 Max. :402.7 Max. :241.83 Max. :300.4 Max. :379.8 Max. :4.57 Max. :345.04 Max. :377.5 Max. :2.1e+02 Max. :189.1 Max. :102.37 Max. :107.1 Max. :141.5 Max. :83.38 Max. :156.1 Max. :153.30 Max. :113.0 Max. :1.2e+02 Max. :165.28 Max. :243.0 Max. :120.557 Max. :208.21 Max. :105.93 Max. :136.5 Max. :214.21 Max. :4.58 Max. :168.9 Max. :176.8 Max. :1.2e+02 Max. :7.60 Max. : 2.6e-07 Max. :0.797 Max. :0.6360 Max. :-0.043 Max. :43.352

8 Ograniczenie zakresu

Poniższy kod ogranicza liczbę wierszy, pozostawiając tylko 50 najczęściej występujących wartości atrybutu res_name.

top_50 <- data_final %>% group_by(res_name) %>%
  summarize(ilosc=n()) %>% arrange(desc(ilosc)) %>% head(50) 
data_final_50 <- data_final %>% filter(res_name %in% top_50[['res_name']])

9 Korelacja między zmiennymi

Poniżej przedstawiono korelację pomiędzy niektórymi zmiennymi.

cor_grouped <- data_final_50 %>% select(local_res_atom_non_h_count:local_skewness, -local_min)

cor_data <- melt(cor(cor_grouped))
ggplot(cor_data, aes(x = Var1, y = Var2, fill = value)) + geom_tile() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

10 Liczebność 50 najpopularniejszych klas

knitr::kable(top_50, align = 'l') %>%
  kable_styling("striped", full_width = FALSE) %>% 
  column_spec(1, width = "2.5cm")
res_name ilosc
SO4 28354
GOL 19163
EDO 14530
CL 11443
NAG 11432
CA 10528
ZN 9989
MG 6957
HEM 5305
PO4 5287
ACT 3769
IOD 3374
DMS 3364
NAD 2556
K 2317
FAD 2248
PEG 2221
MN 2075
ADP 1822
NAP 1720
CD 1696
MLY 1676
UNX 1479
MPD 1380
FMT 1328
MES 1326
PG4 1325
CU 1140
MAN 1106
BR 1101
ATP 1056
FMN 1050
COA 1049
1PE 1020
EPE 965
CLA 939
NDP 912
NI 898
NO3 880
SF4 863
ACY 813
TRS 805
SAH 801
GDP 798
PGE 760
PLP 751
HEC 747
FE 734
CIT 719
FE2 694

11 Wykres rozkładu liczby atomów

plot_atom <- ggplot(data_final_50, aes(local_res_atom_non_h_count, fill = res_name)) + geom_histogram()
ggplotly(plot_atom)

12 Wykres rozkładu liczby elektronów

plot_elect <- ggplot(data_final_50, aes(local_res_atom_non_h_electron_sum, fill = res_name)) + geom_histogram()
ggplotly(plot_elect)

13 Niezgodność liczby atomów

Tabela w tej sekcji przedstawia 10 klas z największą niezgodnością liczby atomów.

niezg_atom <- data_final_50 %>%
  select(res_name, local_res_atom_non_h_count, dict_atom_non_h_count) %>%
  mutate(Roznica_atomow = dict_atom_non_h_count - local_res_atom_non_h_count) %>%
  group_by(res_name) %>%
  summarize(Niezgodnosc_atomow = sum(Roznica_atomow)) %>%
  arrange(desc(Niezgodnosc_atomow)) %>% head(10)

knitr::kable(niezg_atom) %>%
  kable_styling("striped", full_width = FALSE) %>% 
  column_spec(1, width = "2.5cm")
res_name Niezgodnosc_atomow
NAG 11195
CLA 3052
1PE 2749
MLY 2343
NAP 2182
NAD 1833
COA 1593
PG4 1239
MAN 986
NDP 763

14 Niezgodność liczby elektronów

Tabela w tej sekcji przedstawia 10 klas z największą niezgodnością liczby elektronów.

niezg_elect <- data_final_50 %>%
  select(res_name, local_res_atom_non_h_electron_sum, dict_atom_non_h_electron_sum) %>%
  mutate(Roznica_elektronow = dict_atom_non_h_electron_sum - local_res_atom_non_h_electron_sum) %>%
  group_by(res_name) %>%
  summarize(Niezgodnosc_elektronow = sum(Roznica_elektronow)) %>%
  arrange(desc(Niezgodnosc_elektronow)) %>% head(10)

knitr::kable(niezg_elect) %>%
  kable_styling("striped", full_width = FALSE) %>% 
  column_spec(1, width = "2.5cm")
res_name Niezgodnosc_elektronow
NAG 89528
1PE 18734
CLA 18586
MLY 17441
NAP 14817
NAD 12302
COA 11817
PG4 8372
MAN 7888
PLP 5232

15 Wykresy part_01

Poniższe wykresy prezentują rozkład wartości kolumn ‘part_01…’ z podziałem na klasy res_name.

data_part <- data_final_50 %>% select(res_name,part_01_shape_segments_count:part_01_density_Z_4_0)
data_part_01 <- data_part %>% select(-res_name)
names_list <- as.list(colnames(data_part_01))

plot.fn.with.aes <- function(y.axis, x.axis) {
  
      ggplot(data_part, 
             aes_string(x = x.axis, 
                        y = y.axis,
                        color = x.axis)) + 
            geom_point() + theme(axis.text.x = element_blank()) +
            stat_summary(fun.y=mean, colour="black", geom="point", 
               shape=4, size=2, show.legend = FALSE) +
            stat_summary(fun.y=mean, colour="black", geom="text", show.legend = FALSE,
               hjust=-0.2, size=2.6, angle = 90, aes( label=round(..y.., digits=1)))
}

lapply(names_list, plot.fn.with.aes, x.axis = "res_name")

16 Regresja

Przeprowadzenie regresji oraz wyliczenie miar R^2 i RMSE dla przewidywania liczby atomów:

idx <- createDataPartition(y = data_final_50$local_res_atom_non_h_count, p = .6, list = FALSE)
training <- data_final_50[idx, ]
testing <- data_final_50[-idx, ]
ctrl <- trainControl(method = "repeatedcv", number = 2,repeats = 5)

lm <- train(local_res_atom_non_h_count ~ .,
                data = training,
                method = "lm",
                metric = "RMSE",
                trControl = ctrl)
lmp<-predict(lm, newdata=testing)
postResample(lmp,testing$local_res_atom_non_h_count)
##       RMSE   Rsquared        MAE 
## 0.09388563 0.99994728 0.01515412

Przeprowadzenie regresji oraz wyliczenie miar R^2 i RMSE dla przewidywania liczby elektronów:

idx <- createDataPartition(y = data_final_50$local_res_atom_non_h_electron_sum, p = .6, list = FALSE)
training <- data_final_50[idx, ]
testing <- data_final_50[-idx, ]
ctrl <- trainControl(method = "repeatedcv", number = 2,repeats = 5)

lm <- train(local_res_atom_non_h_electron_sum ~ .,
                data = training,
                method = "lm",
                metric = "RMSE",
                trControl = ctrl)
lmp<-predict(lm, newdata=testing)
postResample(lmp,testing$local_res_atom_non_h_electron_sum)
##      RMSE  Rsquared       MAE 
## 1.6640206 0.9996398 0.1031926

17 Klasyfikator

Tworzenie klasyfikatora przewidującego wartość atrybutu res_name:

idx <- createDataPartition(data_final_50$res_name, p = .6, list = FALSE)

training <- data_final_50[idx,]

testing <- data_final_50[-idx,]

ctrl <- trainControl(method="repeatedcv", number=2, repeats = 5)

classifier <- train(as.factor(res_name) ~ .,
             data = training,
             method = "rf",
             trControl = ctrl,
             ntree = 4)

predictions <- predict(classifier, newdata = testing)

results <- confusionMatrix(data = predictions, 
                factor(testing[,1]))

knitr::kable(results$overall)
x
Accuracy 0.9641502
Kappa 0.9616800
AccuracyLower 0.9627634
AccuracyUpper 0.9654993
AccuracyNull 0.1581994
AccuracyPValue 0.0000000
McnemarPValue NaN